## ----loadlibs, echo = FALSE, message = FALSE, warning = FALSE, results = "hide"----
knitr::opts_chunk$set(
                      message = FALSE,
                      warning = FALSE,
                      error = FALSE,
                      results = "hide",
                      echo = FALSE
)
library(tidyverse)
library(pander)
library(RcppRoll)
library(reshape2)
library(tidyr)

source("common_funcs.R")


## ----loaddata------------------------------------------------------------
## Sheet 1 contains case characteristics
sheet1 <- read.csv("./uksc_decided_sheet1.csv",
                   as.is = TRUE)

## Sheet 2 contains opinion/writing characteristics
sheet2 <- read.csv("./uksc_decided_sheet2.csv",
	as.is = TRUE)

stopifnot(all(sheet1$NEUTRAL == sheet2$NEUTRAL))

sheet1 <- cbind(sheet1,
                sheet2[-1])

## Parse some dates
sheet1 <- sheet1 %>%
    mutate(HANDDOWN = as.Date(HANDDOWN)) %>%
    mutate_at(vars(starts_with("HEARINGDATE")),
              as.Date, format = "%Y-%m-%d")

jcpc <- read.csv("./jcpc_decisions_tidied.csv", 
                 as.is = TRUE,
                 na.strings = c("", "NA"))

jcpc <- jcpc %>%
    mutate(HANDDOWN = as.Date(HANDDOWN, "%d %b %Y")) %>%
    mutate_at(vars(starts_with("HEARINGDATE")),
              as.Date)

office <- read.csv("./office_holders.csv",
                   as.is = TRUE)

office <- office %>%
    mutate(Start = as.Date(Start),
           End = as.Date(End)) %>%
    mutate(End = replace(End,
                         End == "9999-12-31",
                         Sys.Date()))

## Create workloads

### For each unique judge, identify the cases they were in
### create a sequence of dates
### Is it a problem this doesn't take into account recent cases?
if (file.exists("cache/workload.rds")) {
    individualWorkload <- readRDS("cache/workload.rds")
} else { 
    individualWorkload <- lapply(1:20, function(j) {

    relevantCases <- sheet1 %>%
        dplyr::select(CASEID, starts_with("JUDGE")) %>%
        melt(id.vars = "CASEID",
             value.name = "Judge") %>%
        filter(!is.na(Judge)) %>%
        filter(Judge == j) %>%
        pull(CASEID)

    relevantCasesJCPC <- jcpc %>%
        dplyr::select(CASEID, starts_with("J")) %>%
        melt(id.vars = "CASEID",
             value.name = "Judge") %>%
        filter(Judge == j) %>%
        pull(CASEID)

    relevantDates <- sheet1 %>%
        filter(CASEID %in% relevantCases) %>%
        dplyr::select(CASEID, HANDDOWN, starts_with("HEARINGDATE")) %>%
        melt(id.vars = "CASEID", value.name = "Date") %>%
        pull(Date) %>%
        na.omit()
    
    relevantDatesJCPC <- jcpc %>%
        filter(CASEID %in% relevantCasesJCPC) %>%
        dplyr::select(CASEID, HANDDOWN, starts_with("HEARINGDATE")) %>%
        melt(id.vars = "CASEID", value.name = "Date") %>%
        pull(Date) %>%
        na.omit()

    relevantDates <- c(relevantDates, relevantDatesJCPC) %>%
        unique()
    
    matchpos <- which(office$Judge == j & office$Role == "Member")
    startdate <- office$Start[matchpos]
    enddate <- office$End[matchpos]

    relevantDates <- relevantDates %>%
        subset(relevantDates <= enddate &
               relevantDates > startdate)

    get_workload <- function(d, df) {
        d <- as.Date(d)
        df %>%
            filter(HEARINGDATE1 <= d &
                   HANDDOWN > d) %>%
            dplyr::select("CASEID",
                   starts_with("HEARINGDATE")) %>%
            melt(id.vars = "CASEID") %>%
            summarise(workload = length(na.omit(unique(value)))) %>%
            pull(workload)
    }
   
    judgeload1 <- sapply(relevantDates,
                         get_workload,
                         sheet1 %>%
                         filter(CASEID %in% relevantCases))
    judgeload2 <- sapply(relevantDates,
                         get_workload,
                         jcpc %>%
                         filter(CASEID %in% relevantCasesJCPC))

## Expand this
    retval <- data.frame(Judge = j,
               Date = relevantDates,
               workloadUKSC = judgeload1,
               workloadJCPC = judgeload2)
    retval <- tidyr::complete(data = retval,
                       Date = seq(startdate, enddate, by = 2),
                       fill = list(Judge = j))
    retval <- retval %>%
        arrange(Date) %>%
        fill(workloadUKSC) %>%
        fill(workloadJCPC) %>%
        mutate(workloadUKSC = coalesce(workloadUKSC, 0L),
               workloadJCPC = coalesce(workloadJCPC, 0L))
    return(retval)
})

individualWorkload <- bind_rows(individualWorkload) %>%
    mutate(workloadTotal = workloadUKSC +
               workloadJCPC)

## Interpolate individual workload
individualWorkload <- individualWorkload %>%
    group_by(Judge) %>%
    tidyr::complete(Date = full_seq(Date, 1)) %>%
    tidyr::fill(workloadUKSC,
                workloadJCPC,
                workloadTotal)

    saveRDS(individualWorkload,
            file = "cache/workload.rds")
}

combWorkload <- individualWorkload %>%
    group_by(Date) %>%
    summarize(workloadUKSC = sum(workloadUKSC,
                                 na.rm = TRUE),
              workloadJCPC = sum(workloadJCPC,
                                 na.rm = TRUE),
              workloadTotal = sum(workloadTotal,
                                  na.rm = TRUE))

reports <- read.csv("./reports_data.csv")
if (any(duplicated(reports$APPORDNEUTRAL))) {
    stop("Duplicated reports")
}

sheet1 <- merge(sheet1, reports,
                all.x = TRUE,
                all.y = FALSE)

## Deal with the area of the case
sheet1$Area <- ct2area(sheet1$APPORDCOURT)
sheet1$Area <- as.character(sheet1$Area)
sheet1$Area[is.na(sheet1$Area)] <- ct2area(sheet1$FICOURT[is.na(sheet1$Area)])
sheet1$Area <- as.character(sheet1$Area)

### For those remaining NAs
### First, public authority -> Public
public.auth <- sheet1$APPTYPE %in% c("D", "E", "J") |
    sheet1$RESPTYPE %in% c("D", "E", "J")

sheet1$Area[which(is.na(sheet1$Area) & public.auth)] <- "Public"

## Company and no public authority -> Civil
companies <- sheet1$APPTYPE %in% c("B", "C") |
	sheet1$RESPTYPE %in% c("B", "C")

sheet1$Area[which(is.na(sheet1$Area) & companies)] <- "Civil"

### Reference questions -> Public
sheet1$Area[is.na(sheet1$Area) & (sheet1$REFQN == "Yes")] <- "Public"

patches <- read.csv("./patches.csv",
                    as.is = TRUE)

for (i in 1:nrow(patches)) {
    matchpos <- which(sheet1$NEUTRAL == patches$NEUTRAL[i])
    sheet1$Area[matchpos] <- patches$LegalArea[i]
}




## ----opinionbelow--------------------------------------------------------
### 
### Dissent in decided cases
###

dissent <- read.csv("./appealled_orders_dissent.csv",
                    as.is = TRUE)

### Remove cases with no APPORDNEUTRAL
### Remove cases which appear twice under different names
dissent <- dissent %>%
    filter(!is.na(APPORDNEUTRAL)) %>%
    dplyr::select(APPORDNEUTRAL,
           nDissents.appord = nDissents,
           nJudges.appord = nJudges,
           UKSCJudge1, UKSCDissent1,
           UKSCJudge2, UKSCDissent2,
           UKSCJudge3, UKSCDissent3) %>%
    distinct()

### Deal with UKSC judges appearing before or after their time
office_holders <- read.csv("./office_holders.csv", 
                           header = TRUE)
office_holders <- subset(office_holders,
                         Role == "Member")
office_holders$Start <- as.Date(office_holders$Start)
office_holders$End <- as.Date(office_holders$End)

pos <- which(!is.na(dissent$UKSCJudge1))
for (i in pos) {
	theMatch <- match(dissent$APPORDNEUTRAL[i], sheet1$APPORDNEUTRAL)
	theDate <- as.Date(sheet1$APPORDDATE[theMatch])
	theJudge <- dissent$UKSCJudge1[i]
	their_Start <- min(office_holders$Start[which(office_holders$Judge == theJudge)])
	if (!is.na(theDate) & theDate < their_Start) {
		dissent$UKSCJudge1[i] <- NA
		dissent$UKSCDissent1[i] <- NA
	}
}

dissent$UKSCJudge_view <- 0
dissent$UKSCJudge_view[which(dissent$UKSCDissent1 == "Yes")] <- 1
dissent$UKSCJudge_view[which(dissent$UKSCDissent1 == "No")] <- -1

### In the case of multiple cases corresponding to one neutral
### take the maximum value
dissent <- dissent %>%
	dplyr:::group_by(APPORDNEUTRAL) %>%
	dplyr:::summarize(nDissents.appord = max(nDissents.appord,na.rm = TRUE), 
		nJudges.appord = max(nJudges.appord, na.rm = TRUE),
		UKSCJudge_view = mean(UKSCJudge_view))

### Merge with sheet1
old.nrow <- nrow(sheet1)
sheet1$ordervar <- 1:nrow(sheet1)
sheet1 <- merge(sheet1, dissent,
	by = "APPORDNEUTRAL",
	all.x = TRUE,
        all.y = FALSE,
	sort = FALSE)

new.nrow <- nrow(sheet1)
if (old.nrow != new.nrow) {
	stop("Merging with dissents has introduced new rows in the data")
}
sheet1 <- sheet1[order(sheet1$ordervar),]
sheet1$ordervar <- NULL
### Cause a warning if there is a case with an APPORDNEUTRAL
### but no nDissents
problematic.cases <- which(!is.na(sheet1$APPORDNEUTRAL) & is.na(sheet1$nDissents.appord))
if (length(problematic.cases) > 0) {
	stop("There are cases for which we lack information on lower-court dissent")
}

### Create the rulesucc variable
### Which is an attempt to create a prior probability given 
### the actions of lower judges
### based on https://en.wikipedia.org/wiki/Rule_of_succession

### First, add on one to the number of judges who heard the case
### if we have information on the first-instance court
### We add one because this is the modal number in the High Court
sheet1$nJudges.appord <- sheet1$nJudges.appord + 
	as.numeric(!is.na(sheet1$FICOURT))
### Second, add on one to the number of judges who "dissented"
### if we know that the appeal court overturned the initial decision
overturned <- grep("A", sheet1$APPORDDISP)
sheet1$nDissents.appord[overturned] <- sheet1$nDissents.appord[overturned] + 1

sheet1$OpinionBelow <- (1 + sheet1$nDissents.appord) /
    (2 + sheet1$nJudges.appord)
sheet1$OpinionBelowReversed <- (1 +
                                (sheet1$nJudges.appord -
                                 sheet1$nDissents.appord)) /
    (2 + sheet1$nJudges.appord)

### Create the number of judges
sheet1$nJudges <- apply(sheet1[,paste0("JUDGE",1:11)],
                        1,
                        function(x)sum(!is.na(x)))


## ----rulesucctable, fig = TRUE, results = "asis"-------------------------
### Group by values of rule succ
opiniontab <- sheet1 %>%
    group_by(nJudges.appord, nDissents.appord, OpinionBelow) %>%
    filter(!is.na(nJudges.appord)) %>%
    filter(is.finite(nJudges.appord)) %>% 
    summarize(Frequency = n())

opiniontab <- opiniontab[order(opiniontab$OpinionBelow),]

colnames(opiniontab) <- c("Heard the case",
                          "Found for the appellants",
                          "Measure",
                          "# cases")

pander(opiniontab,
       digits = c(0, 0, 3, 0),
       caption = "Illustration of the opinion below variable")



## ----judgespecialisms----------------------------------------------------
spec <- read.csv("./judge-specialties.csv")

### Calculate weight to assign to each spell
spec$SpellStart <- as.Date(spec$SpellStart, format = "%Y-%m-%d")
spec$SpellEnd <- as.Date(spec$SpellEnd, format = "%Y-%m-%d")
spec$SpellDurat <- as.numeric(spec$SpellEnd - spec$SpellStart)

spec <- spec %>%
	group_by(Judge, Name) %>%
	mutate(Weight = SpellDurat / sum(SpellDurat, na.rm = T))

spec$Weight[is.na(spec$Weight)] <- 1
spec <- spec %>%
	group_by(Judge, Name) %>%
	summarize(
		Public = sum(Public * Weight),
		Family = sum(Family * Weight),
		Criminal = sum(Criminal * Weight),
		Tax.and.Chancery = sum(Tax.and.Chancery * Weight),
		Civil = sum(Civil * Weight),
		Scots = sum(Scots * Weight),
		NI = sum(NI * Weight)
	)

### Deal with Sumption
areas <- c("Public", "Family", "Criminal", "Tax.and.Chancery",
	"Civil", "Scots", "NI")
ew.judges <- which(spec$Scots == 0 & spec$NI == 0)
spec[which(spec$Name == "Sumption"), areas] <- colMeans(spec[ew.judges, areas], na.rm = T)

### Deal with temporary English judges
english.temps <- which(spec$Judge %in% c(101, 104, 106))
for (e in english.temps) {
	spec[e, areas] <- colMeans(spec[ew.judges, areas], na.rm = T)
}

### Deal with Scottish temporary judges
scots.judges <- which(spec$Judge %in% c(102, 103, 105, 107, 108))
spec[scots.judges, areas] <- 0
spec[scots.judges, "Scots"] <- 1

### Deal with NI temporary judges
ni.judges <- which(spec$Judge %in% c(109))
spec[ni.judges, areas] <- 0
spec[ni.judges, "NI"] <- 1


## ----judgespecialismsout, results = "asis"-------------------------------
display.judges <- c(1, 3, 5:9, 11:14, 17:19)
displaytab <- spec[spec$Judge %in% display.judges,-1]
displaytab[,areas] <- round(displaytab[,areas]*100)  
displaytab$Name <- gsub(" of .*","",displaytab$Name)
displaytab <- displaytab[order(displaytab$Name),]

displaytab <- displaytab[,c("Name","Public","Family","Criminal",
                            "Tax.and.Chancery","Civil")]
names(displaytab) <- sub("Tax.and.Chancery", "Tax/Chancery",
                         names(displaytab))
pander(displaytab,
       caption = "Judge specialisms")



## ----workloadplot, fig = TRUE, fig.cap = "Workload of selected justices", fig.width = 5, fig.height = 3----
judgeseq <- c(8, 15)
plot.df <- individualWorkload %>%
    filter(Judge %in% judgeseq) %>%
    filter(Date <= as.Date("2016-06-01")) %>%
    group_by(Judge) %>%
    arrange(Date) %>%
    mutate(workloadSmooth = roll_mean(workloadTotal, n = 7, fill = NA))

annot.df <- plot.df %>%
    group_by(Judge) %>%
    filter(Date == Date[which.max(workloadSmooth)]) %>%
    mutate(judgeName = num2judge(Judge))

annot.df$Date[which(annot.df$Judge == 8)] <- as.Date("2011-07-01")
annot.df$workloadSmooth[which(annot.df$Judge == 8)] <- 42

workload.plot <- ggplot(plot.df,
                        aes(x = Date,
                            y = workloadSmooth,
                            colour = factor(Judge),
                            linetype = factor(Judge))) +
    geom_line() +
    geom_text(data = annot.df,
              aes(label = judgeName),
              vjust = 0, hjust = 0.5,
              size = 3,
              nudge_y = 1) + 
    scale_x_date("Date") +
    scale_y_continuous("Workload\n(days of hearing in outstanding cases)") + 
    scale_colour_brewer(guide = "none", palette = "Set1") +
    scale_linetype_discrete(guide = "none") + 
    theme_uksc()

print(workload.plot)


## ----reports, results = "asis"-------------------------------------------
sheet1$Reported <- gsub("Where Reported: ","",sheet1$Reported)
unique.reports <- lapply(strsplit(sheet1$Reported, ";"), function(x) gsub("[^A-Za-z ()]","",x))
unique.reports <- lapply(unique.reports, trim)
unique.reports <- lapply(unique.reports, function(x)
	gsub("Times [A-Z][a-z]+","Times",x))
unique.reports <- lapply(unique.reports, function(x)
	gsub("\\(\\) ?","",x))

kill.list <- c("Official Transcript", "NIQB", "NICA", "HCJAC",
	"EWHC (Ch)", "EWHC (QB)", "EWCA Civ", "EWCA Crim",
	"EWHC (Admin)", "CSIH") 
unique.reports <- lapply(unique.reports, function(x) 
	x[!is.element(x, kill.list)])

generalist <- c("All ER", "WLR", "SJLB", "Times", "SLT", "SC", "GWD", "NI")
generalist.count <- lapply(unique.reports, function(x) 
	x[is.element(x, generalist)])
specialist.count <- lapply(unique.reports, function(x) 
	x[!is.element(x, generalist)])

sheet1$Importance <- unlist(lapply(generalist.count, length))
sheet1$Specialist.count <- unlist(lapply(specialist.count, length))

times.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "Times")])
aller.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "All ER")])
wlr.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "WLR")])
sjlb.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "SJLB")])
gwd.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "GWD")])
slt.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "SLT")])
sc.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "SC")])
ni.count <- lapply(unique.reports, function(x) 
	x[is.element(x, "NI")])


sheet1$times.count <- unlist(lapply(times.count, length))
sheet1$aller.count <- unlist(lapply(aller.count, length))
sheet1$wlr.count <- unlist(lapply(wlr.count, length))
sheet1$sjlb.count <- unlist(lapply(sjlb.count, length))
sheet1$sjlb.count[sheet1$sjlb.count > 1] <- 1
sheet1$gwd.count <- unlist(lapply(gwd.count, length))
sheet1$slt.count <- unlist(lapply(slt.count, length))
sheet1$sc.count <- unlist(lapply(sc.count, length))
sheet1$ni.count <- unlist(lapply(ni.count, length))

sheet1$Importance <- apply(sheet1[,c("times.count","aller.count","wlr.count","sjlb.count", 
	"gwd.count", "slt.count", "sc.count")], 1, sum)

### Percentage agreement
### nicked from http://stats.stackexchange.com/questions/28523/how-to-get-percentage-agreement-between-a-group-of-factor-columns
x <- sheet1[,c("times.count","aller.count","wlr.count","sjlb.count")]
mmult <- function(f=`*`, g=sum) 
    function(x, y) apply(y, 2, function(a) apply(x, 1, function(b) g(f(a,b))))
`%**%` <- mmult(`==`, mean)
out <- round(100 * t(x) %**% x, 1)
out[lower.tri(out)] <- NA
rm(`%**%`)


## ----conjoined-----------------------------------------------------------
joinScots <- sheet1 %>%
	group_by(NEUTRAL) %>%
	summarise(Cross = (any(Area == "Scots") & any(Area != "Scots")))

joinScots <- joinScots$NEUTRAL[which(joinScots$Cross == TRUE)]

joinNI <- sheet1 %>%
	group_by(NEUTRAL) %>%
	summarise(Cross = (any(Area == "N Ireland") & any(Area != "N Ireland")))
joinNI <- joinNI$NEUTRAL[which(joinNI$Cross == TRUE)]


## ----citesbyarea---------------------------------------------------------
sheet1$Area <- factor(sheet1$Area)
sheet1$Area <- relevel(sheet1$Area, "Public")
mod <- lm(Specialist.count ~ Importance * Area,
          data = subset(sheet1,
                        !is.element(Area,
                                    c("Scots", "N Ireland"))))
newdata <- data.frame(Importance = 1,
                      Area = c("Chancery", "Family", "Criminal"))
preds <- predict(mod, newdata)
preds <- round(preds, 1)


## ----importanceplot, fig = TRUE, fig.cap = "Importance of cases heard by the Supreme Court", fig.width = 5, fig.height = 5----
plot.df <- sheet1 %>%
    dplyr::select(Importance) %>%
    mutate(nCases = n()) %>%
    group_by(Importance) %>%
    summarize(Frequency = n() / unique(nCases))

import.plot <- ggplot(plot.df, aes(x = factor(Importance), y = Frequency)) +
    geom_bar(stat = "identity") +
    scale_x_discrete("Case importance") +
    scale_y_continuous("Proportion of cases",
                       labels = scales::percent) +
    theme_uksc()

print(import.plot)



## ----savedata------------------------------------------------------------
save(sheet1, combWorkload,
     individualWorkload, spec,
     file = "./ch2_data.RData")
rm(list = ls())

